package com.dxf.day01

import org.apache.spark.{SparkConf, SparkContext}

object HdfsWordCount {
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf().setAppName("app").setMaster("local")

    val sc = new SparkContext(conf)
    //直接加载HDFS上数据处理
    val data = sc.textFile("hdfs://xxx")

    val res = data.flatMap(_.split("\\s+")).map((_, 1)).reduceByKey(_ + _).sortBy(-_._2)

    res.foreach(println)

  }

}
